Project 1

setup

Part 1

#1. Histogram of ratings

library(tidyverse)
library(here)
chocolate <- readRDS(here("data", "chocolate.RDS"))
ggplot(chocolate, aes(x=rating)) + geom_histogram(bins=15) + ggtitle("Histogram of chocolate bar ratings")

Increasing the number of bins allows more granularity of data, better showing the shape of the distribution. However, if we increase the number of bins too much we start to have gaps in our data because the number of bins exceeds the number of potential scores. I selected a 15 bin histogram because it nicely shows the shape of the distribution but doesn’t have the gapping a 20+ bin histogram does.

#2. Number of ratings by country of bean origin

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

count <-count(chocolate, country_of_bean_origin, wt=NULL, sort=TRUE)
print(count, n=62)
## # A tibble: 62 × 2
##    country_of_bean_origin     n
##    <chr>                  <int>
##  1 Venezuela                253
##  2 Peru                     244
##  3 Dominican Republic       226
##  4 Ecuador                  219
##  5 Madagascar               177
##  6 Blend                    156
##  7 Nicaragua                100
##  8 Bolivia                   80
##  9 Colombia                  79
## 10 Tanzania                  79
## 11 Brazil                    78
## 12 Belize                    76
## 13 Vietnam                   73
## 14 Guatemala                 62
## 15 Mexico                    55
## 16 Papua New Guinea          50
## 17 Costa Rica                43
## 18 Trinidad                  42
## 19 Ghana                     41
## 20 India                     35
## 21 U.S.A.                    33
## 22 Haiti                     30
## 23 Honduras                  25
## 24 Jamaica                   24
## 25 Philippines               24
## 26 Indonesia                 20
## 27 Grenada                   19
## 28 Uganda                    19
## 29 Fiji                      16
## 30 Sao Tome                  14
## 31 Vanuatu                   13
## 32 Cuba                      12
## 33 Congo                     11
## 34 Solomon Islands           10
## 35 St. Lucia                 10
## 36 Panama                     9
## 37 Malaysia                   8
## 38 Ivory Coast                7
## 39 Puerto Rico                7
## 40 El Salvador                6
## 41 Thailand                   5
## 42 Sierra Leone               4
## 43 Australia                  3
## 44 Cameroon                   3
## 45 Liberia                    3
## 46 Nigeria                    3
## 47 Samoa                      3
## 48 Togo                       3
## 49 Sao Tome & Principe        2
## 50 Sri Lanka                  2
## 51 Taiwan                     2
## 52 Tobago                     2
## 53 Burma                      1
## 54 China                      1
## 55 DR Congo                   1
## 56 Gabon                      1
## 57 Martinique                 1
## 58 Principe                   1
## 59 St.Vincent-Grenadines      1
## 60 Sulawesi                   1
## 61 Sumatra                    1
## 62 Suriname                   1

The number of ratings by each country of origin is described in the table above

#3. Average ratings for Ecuadorian beans

library(dplyr)
library(knitr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

ecud <- filter(chocolate,country_of_bean_origin == "Ecuador")
ecud <- mutate(ecud, mean=mean(rating, na.rm = TRUE))
ecud <- mutate(ecud, sd=sd(rating, na.rm = TRUE))
ecud <- mutate(ecud, present=case_when(!is.na(rating) ~ 1 , is.na(rating) ~ 0 ))
ecud <- mutate(ecud, total=sum(present))
ecud <- select(ecud, (mean:total))
ecud <- select(ecud, -(present))
ecud <- filter(ecud, !row_number()!= 1 )
knitr::kable(head(ecud[, 1:3]), "pipe")
mean sd total
3.164384 0.5122678 219

Average ratings, standard deviation of ratings and total number of ratings for Ecuadorian chocolates

#4.Best manufacturer

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

ecud <- filter(chocolate,country_of_bean_origin == "Ecuador")
ecud <- mutate(ecud, mean=mean(rating, na.rm = TRUE))
ecud <- mutate(ecud, sd=sd(rating, na.rm = TRUE))
ecud <- mutate(ecud, present=case_when(!is.na(rating) ~ 1 , is.na(rating) ~ 0 ))
ecud <- mutate(ecud, total=sum(present))
company <- group_by(ecud, company_manufacturer) 
summarize(company, rating=mean(rating, na.rm = TRUE))
## # A tibble: 136 × 2
##    company_manufacturer  rating
##    <chr>                  <dbl>
##  1 A. Morin                3.75
##  2 Aequare (Gianduja)      2.88
##  3 Alexandre               3.5 
##  4 Altus aka Cao Artisan   2.75
##  5 Amano                   4   
##  6 Amatller (Simon Coll)   2.75
##  7 Amedei                  3   
##  8 Ara                     2.75
##  9 Arete                   3.44
## 10 Askinosie               3   
## # ℹ 126 more rows

Of the Ecuadorian bean chococlates, A. Morin is the manufacturer with the highest average rating.

#5.Average rating by country of origin

chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country <- summarize(country, rating=mean(rating, na.rm = TRUE))
country <- arrange(country, desc(rating))
knitr::kable(head(country[, 1:2]), "pipe")
country_of_bean_origin rating
Tobago 3.625000
China 3.500000
Sao Tome & Principe 3.500000
Solomon Islands 3.450000
Congo 3.318182
Thailand 3.300000

Tobago (3.63), China (3.50), and Sao Tome & Principe (3.50) are the countries of origin that have the highest average ratings.

#6.Average rating by country of origin in countries with 10+ ratings

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country_n <- mutate(country, n=n())
country_n <- filter(country_n, n>=10)
country_n <- summarize(country_n, rating=mean(rating, na.rm = TRUE))
country_n <- arrange(country_n, desc(rating))
knitr::kable(head(country_n[, 1:2]), "pipe")
country_of_bean_origin rating
Solomon Islands 3.450000
Congo 3.318182
Cuba 3.291667
Vietnam 3.287671
Papua New Guinea 3.280000
Madagascar 3.266949

Solomon Islands (3.45), Congo (3.32), and Cuba (3.29) are the countries of origin that have the highest average rating after filtering out countries with fewer than 10 chococlate bar reviews.

#7.

library(dplyr)
library(forcats)
chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country <- mutate(country, n=n())
country <- filter(country, n>=50)
country <- mutate(country, c_percent = str_remove(cocoa_percent, "%"))
country$c_percent <-as.numeric(country$c_percent) 
country <- mutate(country, percent = case_when(c_percent < 60 ~ "1", c_percent >= 60 & c_percent < 70 ~ "2", c_percent >= 70 & c_percent < 90 ~ "3", c_percent >= 90 ~ "4"))

country <- country %>% mutate(percent = fct_relevel(percent, c("2", "3", "4"), after=1))
#Belize
Belize <- filter(country, country_of_bean_origin=="Belize")
qplot(percent, rating, data=Belize, geom="boxplot") + ggtitle("Belize")
#Blend
Blend <- filter(country, country_of_bean_origin=="Blend")
qplot(percent, rating, data=Blend, geom="boxplot") + ggtitle("Blend")
#Bolivia
Bolivia <- filter(country, country_of_bean_origin=="Bolivia")
qplot(percent, rating, data=Bolivia, geom="boxplot") + ggtitle("Bolivia")
#Brazil
Brazil <- filter(country, country_of_bean_origin=="Brazil")
qplot(percent, rating, data=Brazil, geom="boxplot") + ggtitle("Brazil")  
#Colombia
Colombia <- filter(country, country_of_bean_origin=="Colombia")
qplot(percent, rating, data=Colombia, geom="boxplot") + ggtitle("Colombia")  
#Dominican Republic
DR <- filter(country, country_of_bean_origin=="Dominican Republic")
qplot(percent, rating, data=DR, geom="boxplot") + ggtitle("Dominican Republic")
#Ecuador
Ecuador <- filter(country, country_of_bean_origin=="Ecuador")
qplot(percent, rating, data=Ecuador, geom="boxplot") + ggtitle("Ecuador") 
#Guatemala
Guatemala <- filter(country, country_of_bean_origin=="Guatemala")
qplot(percent, rating, data=Guatemala, geom="boxplot") + ggtitle("Guatemala")
#Madagascar
Madagascar <- filter(country, country_of_bean_origin=="Madagascar")
qplot(percent, rating, data=Madagascar, geom="boxplot") + ggtitle("Madagascar")
#Mexico
Mexico <- filter(country, country_of_bean_origin=="Mexico")
qplot(percent, rating, data=Mexico, geom="boxplot") + ggtitle("Mexico")
#Nicaragua
Nicaragua <- filter(country, country_of_bean_origin=="Nicaragua")
qplot(percent, rating, data=Nicaragua, geom="boxplot") + ggtitle("Nicaragua")
#Papua New Guinea
PNG <- filter(country, country_of_bean_origin=="Papua New Guinea")
qplot(percent, rating, data=PNG, geom="boxplot") + ggtitle("Papua New Guinea")
#Peru
Peru <- filter(country, country_of_bean_origin=="Peru")
qplot(percent, rating, data=Peru, geom="boxplot") + ggtitle("Peru")
#Tanzania
Tanzania <- filter(country, country_of_bean_origin=="Tanzania")
qplot(percent, rating, data=Tanzania, geom="boxplot") + ggtitle("Tanzania")
#Venezuela
Venezuela <- filter(country, country_of_bean_origin=="Venezuela")
qplot(percent, rating, data=Venezuela, geom="boxplot") + ggtitle("Venezuela")
#Vietnam
Vietnam <- filter(country, country_of_bean_origin=="Vietnam")
qplot(percent, rating, data=Vietnam, geom="boxplot") + ggtitle("Vietnam")

Part 2

## # A tibble: 5 × 2
##   continent distinct_points
##   <chr>               <int>
## 1 Africa                  1
## 2 Americas                1
## 3 Asia                    1
## 4 Blend                   1
## 5 Oceania                 1

Part 3

library(tidyverse)
library(here)
chocolate <- readRDS(here("data", "chocolate.RDS"))
#add ingredient columns
chocolate <- mutate(chocolate, beans = case_when(grepl("B", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, sugar = case_when(grepl("S", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, cocoa_butter = case_when(grepl("C", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, vanilla = case_when(grepl("V", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, lecithin = case_when(grepl("L", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, salt = case_when(grepl("Sa", ingredients) ~ 1, .default = 0))
#add characteristic columns
chocolate <- mutate(chocolate, char_cocoa = case_when(grepl("cocoa", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_sweet = case_when(grepl("sweet", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_nutty = case_when(grepl("nutty", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_creamy = case_when(grepl("creamy", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_roasty = case_when(grepl("roasty", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_earthy = case_when(grepl("earthy", most_memorable_characteristics) ~ 1, .default = 0))
#group by year and calculate means
chocolate <- subset(chocolate, select=-c(ref, company_manufacturer, company_location, country_of_bean_origin, specific_bean_origin_or_bar_name, cocoa_percent, ingredients, most_memorable_characteristics, rating ))
chocolate <- group_by(chocolate, review_date)
chocolate <- summarize(chocolate,
              beans = mean(beans, na.rm = TRUE), 
              sugar = mean(sugar, na.rm = TRUE),
              cocoa_butter = mean(cocoa_butter, na.rm = TRUE), 
              vanilla = mean(vanilla, na.rm = TRUE), 
              lecithin = mean(lecithin, na.rm = TRUE), 
              salt = mean(salt, na.rm = TRUE), 
              char_cocoa = mean(char_cocoa, na.rm = TRUE), 
              char_sweet = mean(char_sweet, na.rm = TRUE), 
              char_nutty = mean(char_nutty, na.rm = TRUE), 
              char_creamy = mean(char_creamy, na.rm = TRUE), 
              char_roasty = mean(char_roasty, na.rm = TRUE), 
              char_earthy = mean(char_earthy, na.rm = TRUE))
chocolate_long <- chocolate %>% pivot_longer(-review_date, names_to = "features", values_to = "mean_score")
print(chocolate_long, n=192)
## # A tibble: 192 × 3
##     review_date features     mean_score
##           <dbl> <chr>             <dbl>
##   1        2006 beans           0.968  
##   2        2006 sugar           0.968  
##   3        2006 cocoa_butter    0.903  
##   4        2006 vanilla         0.694  
##   5        2006 lecithin        0.694  
##   6        2006 salt            0      
##   7        2006 char_cocoa      0.210  
##   8        2006 char_sweet      0.161  
##   9        2006 char_nutty      0.0323 
##  10        2006 char_creamy     0.242  
##  11        2006 char_roasty     0.0484 
##  12        2006 char_earthy     0.0645 
##  13        2007 beans           0.945  
##  14        2007 sugar           0.945  
##  15        2007 cocoa_butter    0.767  
##  16        2007 vanilla         0.548  
##  17        2007 lecithin        0.384  
##  18        2007 salt            0      
##  19        2007 char_cocoa      0.342  
##  20        2007 char_sweet      0.0959 
##  21        2007 char_nutty      0.0411 
##  22        2007 char_creamy     0.233  
##  23        2007 char_roasty     0.0137 
##  24        2007 char_earthy     0.0685 
##  25        2008 beans           0.913  
##  26        2008 sugar           0.902  
##  27        2008 cocoa_butter    0.75   
##  28        2008 vanilla         0.359  
##  29        2008 lecithin        0.511  
##  30        2008 salt            0      
##  31        2008 char_cocoa      0.109  
##  32        2008 char_sweet      0.130  
##  33        2008 char_nutty      0.152  
##  34        2008 char_creamy     0.0978 
##  35        2008 char_roasty     0.0435 
##  36        2008 char_earthy     0.0435 
##  37        2009 beans           0.919  
##  38        2009 sugar           0.919  
##  39        2009 cocoa_butter    0.772  
##  40        2009 vanilla         0.325  
##  41        2009 lecithin        0.341  
##  42        2009 salt            0      
##  43        2009 char_cocoa      0.146  
##  44        2009 char_sweet      0.154  
##  45        2009 char_nutty      0.154  
##  46        2009 char_creamy     0.0894 
##  47        2009 char_roasty     0.0813 
##  48        2009 char_earthy     0.0732 
##  49        2010 beans           0.855  
##  50        2010 sugar           0.855  
##  51        2010 cocoa_butter    0.709  
##  52        2010 vanilla         0.227  
##  53        2010 lecithin        0.391  
##  54        2010 salt            0.00909
##  55        2010 char_cocoa      0.218  
##  56        2010 char_sweet      0.1    
##  57        2010 char_nutty      0.145  
##  58        2010 char_creamy     0.0909 
##  59        2010 char_roasty     0.0364 
##  60        2010 char_earthy     0.0727 
##  61        2011 beans           0.939  
##  62        2011 sugar           0.939  
##  63        2011 cocoa_butter    0.693  
##  64        2011 vanilla         0.160  
##  65        2011 lecithin        0.160  
##  66        2011 salt            0.0491 
##  67        2011 char_cocoa      0.172  
##  68        2011 char_sweet      0.110  
##  69        2011 char_nutty      0.117  
##  70        2011 char_creamy     0.129  
##  71        2011 char_roasty     0.0736 
##  72        2011 char_earthy     0.0613 
##  73        2012 beans           0.928  
##  74        2012 sugar           0.928  
##  75        2012 cocoa_butter    0.675  
##  76        2012 vanilla         0.186  
##  77        2012 lecithin        0.124  
##  78        2012 salt            0.0722 
##  79        2012 char_cocoa      0.0876 
##  80        2012 char_sweet      0.139  
##  81        2012 char_nutty      0.103  
##  82        2012 char_creamy     0.0722 
##  83        2012 char_roasty     0.0619 
##  84        2012 char_earthy     0.0464 
##  85        2013 beans           0.967  
##  86        2013 sugar           0.956  
##  87        2013 cocoa_butter    0.776  
##  88        2013 vanilla         0.208  
##  89        2013 lecithin        0.295  
##  90        2013 salt            0.0164 
##  91        2013 char_cocoa      0.175  
##  92        2013 char_sweet      0.126  
##  93        2013 char_nutty      0.115  
##  94        2013 char_creamy     0.0710 
##  95        2013 char_roasty     0.109  
##  96        2013 char_earthy     0.0492 
##  97        2014 beans           0.984  
##  98        2014 sugar           0.984  
##  99        2014 cocoa_butter    0.644  
## 100        2014 vanilla         0.0688 
## 101        2014 lecithin        0.121  
## 102        2014 salt            0.0324 
## 103        2014 char_cocoa      0.0607 
## 104        2014 char_sweet      0.0972 
## 105        2014 char_nutty      0.158  
## 106        2014 char_creamy     0.0486 
## 107        2014 char_roasty     0.0972 
## 108        2014 char_earthy     0.101  
## 109        2015 beans           0.986  
## 110        2015 sugar           0.979  
## 111        2015 cocoa_butter    0.546  
## 112        2015 vanilla         0.0599 
## 113        2015 lecithin        0.120  
## 114        2015 salt            0      
## 115        2015 char_cocoa      0.127  
## 116        2015 char_sweet      0.106  
## 117        2015 char_nutty      0.109  
## 118        2015 char_creamy     0.0423 
## 119        2015 char_roasty     0.123  
## 120        2015 char_earthy     0.0810 
## 121        2016 beans           0.982  
## 122        2016 sugar           0.977  
## 123        2016 cocoa_butter    0.594  
## 124        2016 vanilla         0.0507 
## 125        2016 lecithin        0.106  
## 126        2016 salt            0.00922
## 127        2016 char_cocoa      0.0922 
## 128        2016 char_sweet      0.171  
## 129        2016 char_nutty      0.157  
## 130        2016 char_creamy     0.0553 
## 131        2016 char_roasty     0.101  
## 132        2016 char_earthy     0.111  
## 133        2017 beans           0.981  
## 134        2017 sugar           0.981  
## 135        2017 cocoa_butter    0.562  
## 136        2017 vanilla         0.0286 
## 137        2017 lecithin        0.133  
## 138        2017 salt            0.00952
## 139        2017 char_cocoa      0.133  
## 140        2017 char_sweet      0.0952 
## 141        2017 char_nutty      0.0667 
## 142        2017 char_creamy     0.0952 
## 143        2017 char_roasty     0.124  
## 144        2017 char_earthy     0.124  
## 145        2018 beans           0.987  
## 146        2018 sugar           0.987  
## 147        2018 cocoa_butter    0.596  
## 148        2018 vanilla         0.0614 
## 149        2018 lecithin        0.132  
## 150        2018 salt            0      
## 151        2018 char_cocoa      0.180  
## 152        2018 char_sweet      0.118  
## 153        2018 char_nutty      0.0789 
## 154        2018 char_creamy     0.0439 
## 155        2018 char_roasty     0.110  
## 156        2018 char_earthy     0.123  
## 157        2019 beans           1      
## 158        2019 sugar           1      
## 159        2019 cocoa_butter    0.679  
## 160        2019 vanilla         0.0259 
## 161        2019 lecithin        0.202  
## 162        2019 salt            0      
## 163        2019 char_cocoa      0.259  
## 164        2019 char_sweet      0.145  
## 165        2019 char_nutty      0.0725 
## 166        2019 char_creamy     0.0881 
## 167        2019 char_roasty     0.109  
## 168        2019 char_earthy     0.0415 
## 169        2020 beans           1      
## 170        2020 sugar           1      
## 171        2020 cocoa_butter    0.568  
## 172        2020 vanilla         0.0370 
## 173        2020 lecithin        0.0247 
## 174        2020 salt            0      
## 175        2020 char_cocoa      0.284  
## 176        2020 char_sweet      0.160  
## 177        2020 char_nutty      0.0494 
## 178        2020 char_creamy     0.0370 
## 179        2020 char_roasty     0.0988 
## 180        2020 char_earthy     0.0988 
## 181        2021 beans           1      
## 182        2021 sugar           0.994  
## 183        2021 cocoa_butter    0.646  
## 184        2021 vanilla         0.0114 
## 185        2021 lecithin        0.0800 
## 186        2021 salt            0      
## 187        2021 char_cocoa      0.297  
## 188        2021 char_sweet      0.126  
## 189        2021 char_nutty      0.0971 
## 190        2021 char_creamy     0.0171 
## 191        2021 char_roasty     0.0743 
## 192        2021 char_earthy     0.0686

Long dataset of mean scores for each ingredient and main characteristic by review year

Part 4

library(tidyverse)
library(here)
library(ggplot2)

#rename flavors so they're more descriptive
chocolate_long <- mutate(chocolate_long, features = str_remove(features, "char_"))
chocolate_long <- mutate(chocolate_long, features = if_else(features == "cocoa_butter", "cocoa butter", features))
#reorder features, ingredients then flavours"
chocolate_long <- chocolate_long %>% mutate(features = fct_relevel(features, c("cocoa butter", "lecithin", "sugar", "salt", "vanilla"), after=1))
#create plot
chocolate_long %>% ggplot(aes(review_date, mean_score)) + geom_point(aes(color = features)) + geom_smooth(aes(group=features, color = features), se = FALSE) + labs(title = "Trends in average chocolate bar ratings by ingredients and\nkey characteristics from 2006 to 2021", caption = "Meriam Berka") + xlab("Year") + ylab("Average rating") + scale_x_continuous(breaks = c(2008, 2012, 2016, 2020), minor_breaks = c(2006, 2010, 2014, 2018, 2022)) + theme_gray()

Part 5

library(tidyverse)
library(here)
library(ggplot2)

chocolate_long %>% ggplot(aes(review_date, mean_score)) + geom_point(aes(color = review_date)) + geom_smooth(color = "burlywood4") + labs(title = "Ave chocolate bar ratings by ingredients and key characteristics from 2006 to 2021", caption = "Meriam Berka doesn't endorse this") + xlab("date123%") + ylab(NULL) + scale_x_continuous(breaks = c(2006, 2017, 2018, 2021)) + theme_dark()

  • Instead of colors being used to make trends in features clear it’s highlighting years, which are already obvious based on the x-axis.
  • Colors are inconsistent, compared to the original plot where the same colors were used for points and trends lines, and are low contrast.
  • Overall trend line doesn’t communicate anything meaningful, it’s an average of all points over the years.
  • Title is not wrapped to the plot.
  • Y-label is missing, x-label is uninformative, and x-axis breaks are inconsistent making it very difficult to know what’s even being plotted.
  • The plot doesn’t actually show what the title says it should, since the features are not identified in any way (ex. by color, or shape of points) trends can not be discerned.

Part 6

library(tidyverse)
library(here)
library(ggplot2)
chocolate <- readRDS(here("data", "chocolate.RDS"))

chocolate %>%
    ggplot(aes(x = as.factor(review_date), y = rating)) + geom_violin(draw_quantiles = c(0.5), color = "#FF99FF", fill = "#FFCCFF")  + ggtitle("Distribution of chocolate bar ratings from 0 to 4 by year of\nreview, 2006-2021") + labs(caption = "Data from Tidy Tuesday 01-11-2022:\nR4DS Online Learning Community (2023). Tidy Tuesday: A weekly social data project.\nhttps://github.com/rfordatascience/tidytuesday.')") + xlab("Year of review") + ylab("Rating (0 to 4)") + theme(plot.title = element_text(margin = margin( 0.01, 0, 20, 0), family = "serif", face = "bold"), axis.text.x = element_text(angle = 270, vjust = 0.5, hjust=1, color = "white"), axis.text.y = element_text(color = "white"), axis.title.x = element_text(margin = margin(12, 0, 25, 0)), axis.title.y = element_text(margin = margin(10, 10, 10, 5)), plot.caption = element_text(hjust = 0, color = "white"), panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "lavender"), panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "lavender"), text = element_text(color = "white"), plot.background = element_rect(fill = "gray7"), panel.background = element_rect(fill="gray60"), plot.margin = unit(c(0.5, 0.75, 0.5, 0.75), "inches"))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  • Added an informative title so it’s clear what is actually being represented by this graph and to aid in interpretation.
  • Added y-axis label and x-axis labels and rotated x-axis labels so they’re less cluttered and easier to read.
  • Added caption with data citation to properly attribute the data used.
  • Added line at median of each plot to make it easier to visually compare average scores across years.
  • Changed line to lavender, text to white, background to dark gray, and panel to light grey to increase visual interest while maintaining color contrast.
  • Increased text spacing, including adding a gap between axis labels and axes and title and plot. The caption was also moved to the bottom of the page since it’s extra information and not a part of the actual plot.
  • Changed title font to make it cuter!

setup

Part 1

#1. Histogram of ratings

library(tidyverse)
library(here)
chocolate <- readRDS(here("data", "chocolate.RDS"))
ggplot(chocolate, aes(x=rating)) + geom_histogram(bins=15) + ggtitle("Histogram of chocolate bar ratings")

Increasing the number of bins allows more granularity of data, better showing the shape of the distribution. However, if we increase the number of bins too much we start to have gaps in our data because the number of bins exceeds the number of potential scores. I selected a 15 bin histogram because it nicely shows the shape of the distribution but doesn’t have the gapping a 20+ bin histogram does.

#2. Number of ratings by country of bean origin

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

count <-count(chocolate, country_of_bean_origin, wt=NULL, sort=TRUE)
print(count, n=62)
## # A tibble: 62 × 2
##    country_of_bean_origin     n
##    <chr>                  <int>
##  1 Venezuela                253
##  2 Peru                     244
##  3 Dominican Republic       226
##  4 Ecuador                  219
##  5 Madagascar               177
##  6 Blend                    156
##  7 Nicaragua                100
##  8 Bolivia                   80
##  9 Colombia                  79
## 10 Tanzania                  79
## 11 Brazil                    78
## 12 Belize                    76
## 13 Vietnam                   73
## 14 Guatemala                 62
## 15 Mexico                    55
## 16 Papua New Guinea          50
## 17 Costa Rica                43
## 18 Trinidad                  42
## 19 Ghana                     41
## 20 India                     35
## 21 U.S.A.                    33
## 22 Haiti                     30
## 23 Honduras                  25
## 24 Jamaica                   24
## 25 Philippines               24
## 26 Indonesia                 20
## 27 Grenada                   19
## 28 Uganda                    19
## 29 Fiji                      16
## 30 Sao Tome                  14
## 31 Vanuatu                   13
## 32 Cuba                      12
## 33 Congo                     11
## 34 Solomon Islands           10
## 35 St. Lucia                 10
## 36 Panama                     9
## 37 Malaysia                   8
## 38 Ivory Coast                7
## 39 Puerto Rico                7
## 40 El Salvador                6
## 41 Thailand                   5
## 42 Sierra Leone               4
## 43 Australia                  3
## 44 Cameroon                   3
## 45 Liberia                    3
## 46 Nigeria                    3
## 47 Samoa                      3
## 48 Togo                       3
## 49 Sao Tome & Principe        2
## 50 Sri Lanka                  2
## 51 Taiwan                     2
## 52 Tobago                     2
## 53 Burma                      1
## 54 China                      1
## 55 DR Congo                   1
## 56 Gabon                      1
## 57 Martinique                 1
## 58 Principe                   1
## 59 St.Vincent-Grenadines      1
## 60 Sulawesi                   1
## 61 Sumatra                    1
## 62 Suriname                   1

The number of ratings by each country of origin is described in the table above

#3. Average ratings for Ecuadorian beans

library(dplyr)
library(knitr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

ecud <- filter(chocolate,country_of_bean_origin == "Ecuador")
ecud <- mutate(ecud, mean=mean(rating, na.rm = TRUE))
ecud <- mutate(ecud, sd=sd(rating, na.rm = TRUE))
ecud <- mutate(ecud, present=case_when(!is.na(rating) ~ 1 , is.na(rating) ~ 0 ))
ecud <- mutate(ecud, total=sum(present))
ecud <- select(ecud, (mean:total))
ecud <- select(ecud, -(present))
ecud <- filter(ecud, !row_number()!= 1 )
knitr::kable(head(ecud[, 1:3]), "pipe")
mean sd total
3.164384 0.5122678 219

Average ratings, standard deviation of ratings and total number of ratings for Ecuadorian chocolates

#4.Best manufacturer

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

ecud <- filter(chocolate,country_of_bean_origin == "Ecuador")
ecud <- mutate(ecud, mean=mean(rating, na.rm = TRUE))
ecud <- mutate(ecud, sd=sd(rating, na.rm = TRUE))
ecud <- mutate(ecud, present=case_when(!is.na(rating) ~ 1 , is.na(rating) ~ 0 ))
ecud <- mutate(ecud, total=sum(present))
company <- group_by(ecud, company_manufacturer) 
summarize(company, rating=mean(rating, na.rm = TRUE))
## # A tibble: 136 × 2
##    company_manufacturer  rating
##    <chr>                  <dbl>
##  1 A. Morin                3.75
##  2 Aequare (Gianduja)      2.88
##  3 Alexandre               3.5 
##  4 Altus aka Cao Artisan   2.75
##  5 Amano                   4   
##  6 Amatller (Simon Coll)   2.75
##  7 Amedei                  3   
##  8 Ara                     2.75
##  9 Arete                   3.44
## 10 Askinosie               3   
## # ℹ 126 more rows

Of the Ecuadorian bean chococlates, A. Morin is the manufacturer with the highest average rating.

#5.Average rating by country of origin

chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country <- summarize(country, rating=mean(rating, na.rm = TRUE))
country <- arrange(country, desc(rating))
knitr::kable(head(country[, 1:2]), "pipe")
country_of_bean_origin rating
Tobago 3.625000
China 3.500000
Sao Tome & Principe 3.500000
Solomon Islands 3.450000
Congo 3.318182
Thailand 3.300000

Tobago (3.63), China (3.50), and Sao Tome & Principe (3.50) are the countries of origin that have the highest average ratings.

#6.Average rating by country of origin in countries with 10+ ratings

library(dplyr)
chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country_n <- mutate(country, n=n())
country_n <- filter(country_n, n>=10)
country_n <- summarize(country_n, rating=mean(rating, na.rm = TRUE))
country_n <- arrange(country_n, desc(rating))
knitr::kable(head(country_n[, 1:2]), "pipe")
country_of_bean_origin rating
Solomon Islands 3.450000
Congo 3.318182
Cuba 3.291667
Vietnam 3.287671
Papua New Guinea 3.280000
Madagascar 3.266949

Solomon Islands (3.45), Congo (3.32), and Cuba (3.29) are the countries of origin that have the highest average rating after filtering out countries with fewer than 10 chococlate bar reviews.

#7.

library(dplyr)
library(forcats)
chocolate <- readRDS(here("data", "chocolate.RDS"))

country <- group_by(chocolate, country_of_bean_origin)
country <- mutate(country, n=n())
country <- filter(country, n>=50)
country <- mutate(country, c_percent = str_remove(cocoa_percent, "%"))
country$c_percent <-as.numeric(country$c_percent) 
country <- mutate(country, percent = case_when(c_percent < 60 ~ "1", c_percent >= 60 & c_percent < 70 ~ "2", c_percent >= 70 & c_percent < 90 ~ "3", c_percent >= 90 ~ "4"))

country <- country %>% mutate(percent = fct_relevel(percent, c("2", "3", "4"), after=1))
#Belize
Belize <- filter(country, country_of_bean_origin=="Belize")
qplot(percent, rating, data=Belize, geom="boxplot") + ggtitle("Belize")
#Blend
Blend <- filter(country, country_of_bean_origin=="Blend")
qplot(percent, rating, data=Blend, geom="boxplot") + ggtitle("Blend")
#Bolivia
Bolivia <- filter(country, country_of_bean_origin=="Bolivia")
qplot(percent, rating, data=Bolivia, geom="boxplot") + ggtitle("Bolivia")
#Brazil
Brazil <- filter(country, country_of_bean_origin=="Brazil")
qplot(percent, rating, data=Brazil, geom="boxplot") + ggtitle("Brazil")  
#Colombia
Colombia <- filter(country, country_of_bean_origin=="Colombia")
qplot(percent, rating, data=Colombia, geom="boxplot") + ggtitle("Colombia")  
#Dominican Republic
DR <- filter(country, country_of_bean_origin=="Dominican Republic")
qplot(percent, rating, data=DR, geom="boxplot") + ggtitle("Dominican Republic")
#Ecuador
Ecuador <- filter(country, country_of_bean_origin=="Ecuador")
qplot(percent, rating, data=Ecuador, geom="boxplot") + ggtitle("Ecuador") 
#Guatemala
Guatemala <- filter(country, country_of_bean_origin=="Guatemala")
qplot(percent, rating, data=Guatemala, geom="boxplot") + ggtitle("Guatemala")
#Madagascar
Madagascar <- filter(country, country_of_bean_origin=="Madagascar")
qplot(percent, rating, data=Madagascar, geom="boxplot") + ggtitle("Madagascar")
#Mexico
Mexico <- filter(country, country_of_bean_origin=="Mexico")
qplot(percent, rating, data=Mexico, geom="boxplot") + ggtitle("Mexico")
#Nicaragua
Nicaragua <- filter(country, country_of_bean_origin=="Nicaragua")
qplot(percent, rating, data=Nicaragua, geom="boxplot") + ggtitle("Nicaragua")
#Papua New Guinea
PNG <- filter(country, country_of_bean_origin=="Papua New Guinea")
qplot(percent, rating, data=PNG, geom="boxplot") + ggtitle("Papua New Guinea")
#Peru
Peru <- filter(country, country_of_bean_origin=="Peru")
qplot(percent, rating, data=Peru, geom="boxplot") + ggtitle("Peru")
#Tanzania
Tanzania <- filter(country, country_of_bean_origin=="Tanzania")
qplot(percent, rating, data=Tanzania, geom="boxplot") + ggtitle("Tanzania")
#Venezuela
Venezuela <- filter(country, country_of_bean_origin=="Venezuela")
qplot(percent, rating, data=Venezuela, geom="boxplot") + ggtitle("Venezuela")
#Vietnam
Vietnam <- filter(country, country_of_bean_origin=="Vietnam")
qplot(percent, rating, data=Vietnam, geom="boxplot") + ggtitle("Vietnam")

Part 2

## # A tibble: 5 × 2
##   continent distinct_points
##   <chr>               <int>
## 1 Africa                  1
## 2 Americas                1
## 3 Asia                    1
## 4 Blend                   1
## 5 Oceania                 1

Part 3

library(tidyverse)
library(here)
chocolate <- readRDS(here("data", "chocolate.RDS"))
#add ingredient columns
chocolate <- mutate(chocolate, beans = case_when(grepl("B", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, sugar = case_when(grepl("S", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, cocoa_butter = case_when(grepl("C", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, vanilla = case_when(grepl("V", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, lecithin = case_when(grepl("L", ingredients) ~ 1, .default = 0))
chocolate <- mutate(chocolate, salt = case_when(grepl("Sa", ingredients) ~ 1, .default = 0))
#add characteristic columns
chocolate <- mutate(chocolate, char_cocoa = case_when(grepl("cocoa", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_sweet = case_when(grepl("sweet", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_nutty = case_when(grepl("nutty", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_creamy = case_when(grepl("creamy", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_roasty = case_when(grepl("roasty", most_memorable_characteristics) ~ 1, .default = 0))
chocolate <- mutate(chocolate, char_earthy = case_when(grepl("earthy", most_memorable_characteristics) ~ 1, .default = 0))
#group by year and calculate means
chocolate <- subset(chocolate, select=-c(ref, company_manufacturer, company_location, country_of_bean_origin, specific_bean_origin_or_bar_name, cocoa_percent, ingredients, most_memorable_characteristics, rating ))
chocolate <- group_by(chocolate, review_date)
chocolate <- summarize(chocolate,
              beans = mean(beans, na.rm = TRUE), 
              sugar = mean(sugar, na.rm = TRUE),
              cocoa_butter = mean(cocoa_butter, na.rm = TRUE), 
              vanilla = mean(vanilla, na.rm = TRUE), 
              lecithin = mean(lecithin, na.rm = TRUE), 
              salt = mean(salt, na.rm = TRUE), 
              char_cocoa = mean(char_cocoa, na.rm = TRUE), 
              char_sweet = mean(char_sweet, na.rm = TRUE), 
              char_nutty = mean(char_nutty, na.rm = TRUE), 
              char_creamy = mean(char_creamy, na.rm = TRUE), 
              char_roasty = mean(char_roasty, na.rm = TRUE), 
              char_earthy = mean(char_earthy, na.rm = TRUE))
chocolate_long <- chocolate %>% pivot_longer(-review_date, names_to = "features", values_to = "mean_score")
print(chocolate_long, n=192)
## # A tibble: 192 × 3
##     review_date features     mean_score
##           <dbl> <chr>             <dbl>
##   1        2006 beans           0.968  
##   2        2006 sugar           0.968  
##   3        2006 cocoa_butter    0.903  
##   4        2006 vanilla         0.694  
##   5        2006 lecithin        0.694  
##   6        2006 salt            0      
##   7        2006 char_cocoa      0.210  
##   8        2006 char_sweet      0.161  
##   9        2006 char_nutty      0.0323 
##  10        2006 char_creamy     0.242  
##  11        2006 char_roasty     0.0484 
##  12        2006 char_earthy     0.0645 
##  13        2007 beans           0.945  
##  14        2007 sugar           0.945  
##  15        2007 cocoa_butter    0.767  
##  16        2007 vanilla         0.548  
##  17        2007 lecithin        0.384  
##  18        2007 salt            0      
##  19        2007 char_cocoa      0.342  
##  20        2007 char_sweet      0.0959 
##  21        2007 char_nutty      0.0411 
##  22        2007 char_creamy     0.233  
##  23        2007 char_roasty     0.0137 
##  24        2007 char_earthy     0.0685 
##  25        2008 beans           0.913  
##  26        2008 sugar           0.902  
##  27        2008 cocoa_butter    0.75   
##  28        2008 vanilla         0.359  
##  29        2008 lecithin        0.511  
##  30        2008 salt            0      
##  31        2008 char_cocoa      0.109  
##  32        2008 char_sweet      0.130  
##  33        2008 char_nutty      0.152  
##  34        2008 char_creamy     0.0978 
##  35        2008 char_roasty     0.0435 
##  36        2008 char_earthy     0.0435 
##  37        2009 beans           0.919  
##  38        2009 sugar           0.919  
##  39        2009 cocoa_butter    0.772  
##  40        2009 vanilla         0.325  
##  41        2009 lecithin        0.341  
##  42        2009 salt            0      
##  43        2009 char_cocoa      0.146  
##  44        2009 char_sweet      0.154  
##  45        2009 char_nutty      0.154  
##  46        2009 char_creamy     0.0894 
##  47        2009 char_roasty     0.0813 
##  48        2009 char_earthy     0.0732 
##  49        2010 beans           0.855  
##  50        2010 sugar           0.855  
##  51        2010 cocoa_butter    0.709  
##  52        2010 vanilla         0.227  
##  53        2010 lecithin        0.391  
##  54        2010 salt            0.00909
##  55        2010 char_cocoa      0.218  
##  56        2010 char_sweet      0.1    
##  57        2010 char_nutty      0.145  
##  58        2010 char_creamy     0.0909 
##  59        2010 char_roasty     0.0364 
##  60        2010 char_earthy     0.0727 
##  61        2011 beans           0.939  
##  62        2011 sugar           0.939  
##  63        2011 cocoa_butter    0.693  
##  64        2011 vanilla         0.160  
##  65        2011 lecithin        0.160  
##  66        2011 salt            0.0491 
##  67        2011 char_cocoa      0.172  
##  68        2011 char_sweet      0.110  
##  69        2011 char_nutty      0.117  
##  70        2011 char_creamy     0.129  
##  71        2011 char_roasty     0.0736 
##  72        2011 char_earthy     0.0613 
##  73        2012 beans           0.928  
##  74        2012 sugar           0.928  
##  75        2012 cocoa_butter    0.675  
##  76        2012 vanilla         0.186  
##  77        2012 lecithin        0.124  
##  78        2012 salt            0.0722 
##  79        2012 char_cocoa      0.0876 
##  80        2012 char_sweet      0.139  
##  81        2012 char_nutty      0.103  
##  82        2012 char_creamy     0.0722 
##  83        2012 char_roasty     0.0619 
##  84        2012 char_earthy     0.0464 
##  85        2013 beans           0.967  
##  86        2013 sugar           0.956  
##  87        2013 cocoa_butter    0.776  
##  88        2013 vanilla         0.208  
##  89        2013 lecithin        0.295  
##  90        2013 salt            0.0164 
##  91        2013 char_cocoa      0.175  
##  92        2013 char_sweet      0.126  
##  93        2013 char_nutty      0.115  
##  94        2013 char_creamy     0.0710 
##  95        2013 char_roasty     0.109  
##  96        2013 char_earthy     0.0492 
##  97        2014 beans           0.984  
##  98        2014 sugar           0.984  
##  99        2014 cocoa_butter    0.644  
## 100        2014 vanilla         0.0688 
## 101        2014 lecithin        0.121  
## 102        2014 salt            0.0324 
## 103        2014 char_cocoa      0.0607 
## 104        2014 char_sweet      0.0972 
## 105        2014 char_nutty      0.158  
## 106        2014 char_creamy     0.0486 
## 107        2014 char_roasty     0.0972 
## 108        2014 char_earthy     0.101  
## 109        2015 beans           0.986  
## 110        2015 sugar           0.979  
## 111        2015 cocoa_butter    0.546  
## 112        2015 vanilla         0.0599 
## 113        2015 lecithin        0.120  
## 114        2015 salt            0      
## 115        2015 char_cocoa      0.127  
## 116        2015 char_sweet      0.106  
## 117        2015 char_nutty      0.109  
## 118        2015 char_creamy     0.0423 
## 119        2015 char_roasty     0.123  
## 120        2015 char_earthy     0.0810 
## 121        2016 beans           0.982  
## 122        2016 sugar           0.977  
## 123        2016 cocoa_butter    0.594  
## 124        2016 vanilla         0.0507 
## 125        2016 lecithin        0.106  
## 126        2016 salt            0.00922
## 127        2016 char_cocoa      0.0922 
## 128        2016 char_sweet      0.171  
## 129        2016 char_nutty      0.157  
## 130        2016 char_creamy     0.0553 
## 131        2016 char_roasty     0.101  
## 132        2016 char_earthy     0.111  
## 133        2017 beans           0.981  
## 134        2017 sugar           0.981  
## 135        2017 cocoa_butter    0.562  
## 136        2017 vanilla         0.0286 
## 137        2017 lecithin        0.133  
## 138        2017 salt            0.00952
## 139        2017 char_cocoa      0.133  
## 140        2017 char_sweet      0.0952 
## 141        2017 char_nutty      0.0667 
## 142        2017 char_creamy     0.0952 
## 143        2017 char_roasty     0.124  
## 144        2017 char_earthy     0.124  
## 145        2018 beans           0.987  
## 146        2018 sugar           0.987  
## 147        2018 cocoa_butter    0.596  
## 148        2018 vanilla         0.0614 
## 149        2018 lecithin        0.132  
## 150        2018 salt            0      
## 151        2018 char_cocoa      0.180  
## 152        2018 char_sweet      0.118  
## 153        2018 char_nutty      0.0789 
## 154        2018 char_creamy     0.0439 
## 155        2018 char_roasty     0.110  
## 156        2018 char_earthy     0.123  
## 157        2019 beans           1      
## 158        2019 sugar           1      
## 159        2019 cocoa_butter    0.679  
## 160        2019 vanilla         0.0259 
## 161        2019 lecithin        0.202  
## 162        2019 salt            0      
## 163        2019 char_cocoa      0.259  
## 164        2019 char_sweet      0.145  
## 165        2019 char_nutty      0.0725 
## 166        2019 char_creamy     0.0881 
## 167        2019 char_roasty     0.109  
## 168        2019 char_earthy     0.0415 
## 169        2020 beans           1      
## 170        2020 sugar           1      
## 171        2020 cocoa_butter    0.568  
## 172        2020 vanilla         0.0370 
## 173        2020 lecithin        0.0247 
## 174        2020 salt            0      
## 175        2020 char_cocoa      0.284  
## 176        2020 char_sweet      0.160  
## 177        2020 char_nutty      0.0494 
## 178        2020 char_creamy     0.0370 
## 179        2020 char_roasty     0.0988 
## 180        2020 char_earthy     0.0988 
## 181        2021 beans           1      
## 182        2021 sugar           0.994  
## 183        2021 cocoa_butter    0.646  
## 184        2021 vanilla         0.0114 
## 185        2021 lecithin        0.0800 
## 186        2021 salt            0      
## 187        2021 char_cocoa      0.297  
## 188        2021 char_sweet      0.126  
## 189        2021 char_nutty      0.0971 
## 190        2021 char_creamy     0.0171 
## 191        2021 char_roasty     0.0743 
## 192        2021 char_earthy     0.0686

Long dataset of mean scores for each ingredient and main characteristic by review year

Part 4

library(tidyverse)
library(here)
library(ggplot2)

#rename flavors so they're more descriptive
chocolate_long <- mutate(chocolate_long, features = str_remove(features, "char_"))
chocolate_long <- mutate(chocolate_long, features = if_else(features == "cocoa_butter", "cocoa butter", features))
#reorder features, ingredients then flavours"
chocolate_long <- chocolate_long %>% mutate(features = fct_relevel(features, c("cocoa butter", "lecithin", "sugar", "salt", "vanilla"), after=1))
#create plot
chocolate_long %>% ggplot(aes(review_date, mean_score)) + geom_point(aes(color = features)) + geom_smooth(aes(group=features, color = features), se = FALSE) + labs(title = "Trends in average chocolate bar ratings by ingredients and\nkey characteristics from 2006 to 2021", caption = "Meriam Berka") + xlab("Year") + ylab("Average rating") + scale_x_continuous(breaks = c(2008, 2012, 2016, 2020), minor_breaks = c(2006, 2010, 2014, 2018, 2022)) + theme_gray()

Part 5

library(tidyverse)
library(here)
library(ggplot2)

chocolate_long %>% ggplot(aes(review_date, mean_score)) + geom_point(aes(color = review_date)) + geom_smooth(color = "burlywood4") + labs(title = "Ave chocolate bar ratings by ingredients and key characteristics from 2006 to 2021", caption = "Meriam Berka doesn't endorse this") + xlab("date123%") + ylab(NULL) + scale_x_continuous(breaks = c(2006, 2017, 2018, 2021)) + theme_dark()

  • Instead of colors being used to make trends in features clear it’s highlighting years, which are already obvious based on the x-axis.
  • Colors are inconsistent, compared to the original plot where the same colors were used for points and trends lines, and are low contrast.
  • Overall trend line doesn’t communicate anything meaningful, it’s an average of all points over the years.
  • Title is not wrapped to the plot.
  • Y-label is missing, x-label is uninformative, and x-axis breaks are inconsistent making it very difficult to know what’s even being plotted.
  • The plot doesn’t actually show what the title says it should, since the features are not identified in any way (ex. by color, or shape of points) trends can not be discerned.

Part 6

library(tidyverse)
library(here)
library(ggplot2)
chocolate <- readRDS(here("data", "chocolate.RDS"))

chocolate %>%
    ggplot(aes(x = as.factor(review_date), y = rating)) + geom_violin(draw_quantiles = c(0.5), color = "#FF99FF", fill = "#FFCCFF")  + ggtitle("Distribution of chocolate bar ratings from 0 to 4 by year of\nreview, 2006-2021") + labs(caption = "Data from Tidy Tuesday 01-11-2022:\nR4DS Online Learning Community (2023). Tidy Tuesday: A weekly social data project.\nhttps://github.com/rfordatascience/tidytuesday.')") + xlab("Year of review") + ylab("Rating (0 to 4)") + theme(plot.title = element_text(margin = margin( 0.01, 0, 20, 0), family = "serif", face = "bold"), axis.text.x = element_text(angle = 270, vjust = 0.5, hjust=1, color = "white"), axis.text.y = element_text(color = "white"), axis.title.x = element_text(margin = margin(12, 0, 25, 0)), axis.title.y = element_text(margin = margin(10, 10, 10, 5)), plot.caption = element_text(hjust = 0, color = "white"), panel.grid.major = element_line(size = 0.5, linetype = 'solid', colour = "lavender"), panel.grid.minor = element_line(size = 0.25, linetype = 'solid', colour = "lavender"), text = element_text(color = "white"), plot.background = element_rect(fill = "gray7"), panel.background = element_rect(fill="gray60"), plot.margin = unit(c(0.5, 0.75, 0.5, 0.75), "inches"))
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

  • Added an informative title so it’s clear what is actually being represented by this graph and to aid in interpretation.
  • Added y-axis label and x-axis labels and rotated x-axis labels so they’re less cluttered and easier to read.
  • Added caption with data citation to properly attribute the data used.
  • Added line at median of each plot to make it easier to visually compare average scores across years.
  • Changed line to lavender, text to white, background to dark gray, and panel to light grey to increase visual interest while maintaining color contrast.
  • Increased text spacing, including adding a gap between axis labels and axes and title and plot. The caption was also moved to the bottom of the page since it’s extra information and not a part of the actual plot.
  • Changed title font to make it cuter!